data_depression <- read_csv("../data/depression - Fried 2017/MatrixB.csv") #Load dat for estimating Jaccard index (no difference between specific and compound symptoms)
data_anxiety <- read_csv("../data/anxiety - Wall & Lee 2022/anxiety_wall_lee_jaccard.csv")
data_psychosis <- read_csv("../data/psychosis - Bernardin et al. 2023/psychosis - Bernardin et al. 2023.csv") |>
janitor::clean_names() %>%
mutate(across(everything(), ~ ifelse(. == 2, 1, .)))
data_hypomania <- read_csv("../data/hypomania - Chrobak et al. 2018/hypomania.csv")
data_eatingdisorders <- read_csv("../data/eating disorders - Christensen Pacella et al. 2024/MatrixB_EDMeasures_5-26-2023.csv")
data_youth_ocd <- read_csv("../data/ocd - Visiontay et al. 2019/youth_ocd.csv") |>
janitor::clean_names() %>%
mutate(across(everything(), ~ ifelse(. == 2, 1, .)))
# youth depression
# youth anxietyReal, R., & Vargas, J. M. (1996). The probabilistic basis of Jaccard’s index of similarity. Systematic Biology, 45(3), 380-385. DOI:10.1093/sysbio/45.3.380
escalc_jaccard <- function(.data, domain = NA){
data_mat_t <- .data |>
as.matrix() |>
t()
jaccard_diff <- proxy::dist(data_mat_t, method = "Jaccard") |> as.matrix()
jaccard_sim <- 1 - jaccard_diff
# calculate SEs for each Jaccard similarity
var_names <- rownames(data_mat_t)
n_vars <- length(var_names)
# initialize the SE matrix
se_matrix <- matrix(NA, nrow = n_vars, ncol = n_vars)
rownames(se_matrix) <- var_names
colnames(se_matrix) <- var_names
# compute SEs
for (i in 1:n_vars) {
for (j in 1:n_vars) {
x <- data_mat_t[i, ]
y <- data_mat_t[j, ]
a <- sum(x == 1 & y == 1)
b <- sum(x == 1 & y == 0)
c <- sum(x == 0 & y == 1)
n <- a + b + c
J <- a / n
SE <- sqrt(J * (1 - J) / n)
se_matrix[i, j] <- SE
}
}
# logit transform
logit_jaccard_sim <- qlogis(jaccard_sim)
# calculate SEs on the logit scale using the delta method
# SE_logit_p = SE_p / [p * (1 - p)]
se_logit_jaccard_sim <- se_matrix / (jaccard_sim * (1 - jaccard_sim))
# replace infinite values resulting from division by zero
se_logit_jaccard_sim[!is.finite(se_logit_jaccard_sim)] <- NA
# create variable pair names
m <- jaccard_sim
## get indices and values
indices <- which(lower.tri(m), arr.ind = TRUE)
vec <- m[lower.tri(m)]
## create variable pair names
variable_pairs <- paste(rownames(m)[indices[,1]], colnames(m)[indices[,2]], sep = " - ")
# create a data frame for effect sizes
dat_es <- tibble(domain = domain,
yi = logit_jaccard_sim[lower.tri(logit_jaccard_sim)],
sei = se_logit_jaccard_sim[lower.tri(se_logit_jaccard_sim)],
scale1 = rownames(m)[indices[,1]],
scale2 = colnames(m)[indices[,2]],
pair = variable_pairs)
return(dat_es)
}
rma_jaccard_three_level <- function(effect_sizes, title = "Scales"){
# fit a three-level meta-analysis model accounting for shared scales
fit <- rma.mv(yi = yi,
V = sei^2,
random = list(~ 1 | scale1, ~ 1 | scale2),
data = effect_sizes,
method = "REML")
# create a forest plot
plot_forest <-
forest(fit,
transf = plogis,
slab = effect_sizes$pair,
xlim = c(-0.8, 1.7),
at = c(0, .2, .4, .6, .8, 1),
efac = 0.2,
addpred = TRUE,
xlab = "Jaccard Similarity",
header = c(title, "J [95% CI]"),
refline = NA)
# table of results
results_predictions <- fit |>
predict() |>
as_tibble() |>
dplyr::select(J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower = pi.lb, pi_upper = pi.ub) |>
mutate_all(plogis) |>
mutate_all(janitor::round_half_up, digits = 2) |>
mutate(domain = title) |>
relocate(domain, .before = J)
return(list(fit = fit,
predictions = results_predictions,
forest = plot_forest))
}es_depression <- data_depression |>
escalc_jaccard(domain = "Depression")
res_depression <- es_depression |>
rma_jaccard_three_level(title = "Depression scales")| domain | J | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| Depression scales | 0.43 | 0.35 | 0.52 | 0.24 | 0.64 |
es_anxiety <- data_anxiety |>
escalc_jaccard(domain = "Anxiety")
res_anxiety <- es_anxiety |>
rma_jaccard_three_level(title = "Anxiety scales")| domain | J | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| Anxiety scales | 0.27 | 0.22 | 0.32 | 0.14 | 0.44 |
es_psychosis <- data_psychosis |>
escalc_jaccard(domain = "Psychosis")
res_psychosis <- es_psychosis |>
rma_jaccard_three_level(title = "Psychosis scales")| domain | J | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| Psychosis scales | 0.21 | 0.16 | 0.26 | 0.08 | 0.43 |
es_hypomania <- data_hypomania |>
escalc_jaccard(domain = "(Hypo)mania")
res_hypomania <- es_hypomania |>
rma_jaccard_three_level(title = "(Hypo)mania scales")| domain | J | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| (Hypo)mania scales | 0.4 | 0.31 | 0.5 | 0.2 | 0.64 |
es_youth_ocd <- data_youth_ocd |>
escalc_jaccard(domain = "Youth OCD")
res_youth_ocd <- es_youth_ocd |>
rma_jaccard_three_level(title = "Youth OCD scales")| domain | J | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| Youth OCD scales | 0.38 | 0.32 | 0.44 | 0.32 | 0.44 |
es_combined <- bind_rows(
es_depression,
es_anxiety,
es_psychosis,
es_hypomania,
es_eatingdisorders,
es_youth_ocd
)
rma_jaccard_four_level <- function(effect_sizes){
# Fit a four-level meta-analysis model accounting for shared scales
fit <- rma.mv(yi = yi,
V = sei^2,
random = list(~ 1 | domain/scale1, ~ 1 | domain/scale2),
data = effect_sizes,
method = "REML")
# Create a forest plot
plot_forest <-
forest(fit,
transf = plogis,
slab = effect_sizes$pair,
xlim = c(-0.8, 1.7),
at = c(0, .2, .4, .6, .8, 1),
#cex = 0.4,
efac = 0.2,
addpred = TRUE,
xlab = "Jaccard Similarity",
header = c("Scales", "J [95% CI]"),
refline = NA)
results_predictions <- fit |>
predict() |>
as_tibble() |>
dplyr::select(J = pred, ci_lower = ci.lb, ci_upper = ci.ub, pi_lower = pi.lb, pi_upper = pi.ub) |>
mutate_all(plogis) |>
mutate_all(janitor::round_half_up, digits = 2) |>
mutate(domain = "4-level RE") |>
relocate(domain, .before = J)
return(list(fit = fit,
predictions = results_predictions,
forest = plot_forest))
}
res_combined <- rma_jaccard_four_level(es_combined)| domain | J | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| 4-level RE | 0.3 | 0.23 | 0.39 | 0.11 | 0.6 |
#res_combined$fit
# Combine estimates and labels into a data frame for easy viewing
tibble(label = c("domain", "domain/scale1", "domain again", "domain/scale2")) |>
mutate(logit_sigma = res_combined$fit$sigma2,
sigma = round_half_up(plogis(logit_sigma), 2)) |>
filter(label != "domain again") |>
select(-logit_sigma) |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| label | sigma |
|---|---|
| domain | 0.52 |
| domain/scale1 | 0.53 |
| domain/scale2 | 0.51 |
3- and 4-level meta
predictions_combined_by_domain <- bind_rows(
res_depression$predictions,
res_anxiety$predictions,
res_psychosis$predictions,
res_hypomania$predictions,
res_youth_ocd$predictions,
res_eatingdisorders$predictions,
res_combined$predictions
) |>
mutate(domain = fct_rev(fct_relevel(domain,
"Depression scales",
"Anxiety scales",
"Psychosis scales",
"(Hypo)mania scales",
"Youth OCD scales",
"Eating disorders scales",
"4-level RE")))
predictions_combined_by_domain |>
kable(align = "r") |>
kable_classic(full_width = FALSE)| domain | J | ci_lower | ci_upper | pi_lower | pi_upper |
|---|---|---|---|---|---|
| Depression scales | 0.43 | 0.35 | 0.52 | 0.24 | 0.64 |
| Anxiety scales | 0.27 | 0.22 | 0.32 | 0.14 | 0.44 |
| Psychosis scales | 0.21 | 0.16 | 0.26 | 0.08 | 0.43 |
| (Hypo)mania scales | 0.40 | 0.31 | 0.50 | 0.20 | 0.64 |
| Youth OCD scales | 0.38 | 0.32 | 0.44 | 0.32 | 0.44 |
| Eating disorders scales | 0.20 | 0.14 | 0.28 | 0.09 | 0.39 |
| 4-level RE | 0.30 | 0.23 | 0.39 | 0.11 | 0.60 |
ggplot(predictions_combined_by_domain, aes(J, domain)) +
# geom_errorbarh(aes(xmin = pi_lower, xmax = pi_upper), linetype = "dotted", height = 0.26) +
# geom_errorbarh(aes(xmin = ci_lower, xmax = ci_upper), linetype = "solid", height = 0.26, size = 0.85) +
geom_linerange(aes(xmin = pi_lower, xmax = pi_upper), linetype = "solid", size = 0.45) +
geom_linerange(aes(xmin = ci_lower, xmax = ci_upper), linetype = "solid", size = 0.90) +
geom_point(size = 2.5) +
theme_linedraw() +
scale_x_continuous(limits = c(0,1), breaks = scales::breaks_pretty(n = 5), name = "Jaccard Similarity") +
ylab("")## R version 4.3.3 (2024-02-29)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Zurich
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.4.0 knitr_1.48 scales_1.3.0
## [4] ggstance_0.3.7 ggplot2_3.5.1 metafor_4.6-0
## [7] numDeriv_2016.8-1.1 metadat_1.2-0 Matrix_1.6-5
## [10] janitor_2.2.0 proxy_0.4-27 forcats_1.0.0
## [13] tibble_3.2.1 readr_2.1.5 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.5 xfun_0.46 bslib_0.7.0 lattice_0.22-6
## [5] mathjaxr_1.6-0 tzdb_0.4.0 vctrs_0.6.5 tools_4.3.3
## [9] generics_0.1.3 parallel_4.3.3 fansi_1.0.6 highr_0.11
## [13] pkgconfig_2.0.3 lifecycle_1.0.4 compiler_4.3.3 farver_2.1.2
## [17] stringr_1.5.1 textshaping_0.3.7 munsell_0.5.1 snakecase_0.11.1
## [21] htmltools_0.5.8.1 sass_0.4.9 yaml_2.3.8 pillar_1.9.0
## [25] crayon_1.5.2 jquerylib_0.1.4 cachem_1.0.8 nlme_3.1-164
## [29] tidyselect_1.2.1 digest_0.6.35 stringi_1.8.4 fastmap_1.1.1
## [33] grid_4.3.3 colorspace_2.1-1 cli_3.6.3 magrittr_2.0.3
## [37] utf8_1.2.4 withr_3.0.1 bit64_4.0.5 lubridate_1.9.3
## [41] timechange_0.3.0 rmarkdown_2.26 bit_4.0.5 ragg_1.3.0
## [45] hms_1.1.3 evaluate_0.23 viridisLite_0.4.2 rlang_1.1.4
## [49] glue_1.7.0 xml2_1.3.6 svglite_2.1.3 rstudioapi_0.16.0
## [53] vroom_1.6.5 jsonlite_1.8.8 R6_2.5.1 systemfonts_1.0.6